Load necessary packages
library(caret) # Classification and Regression Training
## Warning: package 'caret' was built under R version 3.5.2
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.2
library(corrplot)
## corrplot 0.84 loaded
library(dplyr) # A Grammar of Data Manipulations
## Warning: package 'dplyr' was built under R version 3.5.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forcats) # Tools for Working with Categorical Variables (Factors)
## Warning: package 'forcats' was built under R version 3.5.2
library(gdata) # Various R Programming Tools for Data Manipulation
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
##
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
##
## Attaching package: 'gdata'
## The following objects are masked from 'package:dplyr':
##
## combine, first, last
## The following object is masked from 'package:stats':
##
## nobs
## The following object is masked from 'package:utils':
##
## object.size
## The following object is masked from 'package:base':
##
## startsWith
library(ggplot2) # Create Elegant Data Visualizations Using the Grammar of Graphics
library(ggrepel) # Automatically Position Non-Overlapping Text Labels with ‘ggplot2’
## Warning: package 'ggrepel' was built under R version 3.5.2
library(ggridges) # Ridgeline Plots in ‘ggplot2’
##
## Attaching package: 'ggridges'
## The following object is masked from 'package:ggplot2':
##
## scale_discrete_manual
library(ggthemes) # Extra Themes, Scales, and Geoms for ‘ggplot2’
## Warning: package 'ggthemes' was built under R version 3.5.2
library(gmodels) # Various R Programming Tools for Model Fitting
library(grid) # The Grid Graphics Package
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:gdata':
##
## combine
## The following object is masked from 'package:dplyr':
##
## combine
library(knitr) # A General-Purpose Package for Dynamic Report Generation in R
## Warning: package 'knitr' was built under R version 3.5.2
library(lmtest) # Testing Linear Regression Models
## Warning: package 'lmtest' was built under R version 3.5.2
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(plyr) # Tools for Splitting, Applying and Combining Data
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(purrr) # Functional Programming Tools
## Warning: package 'purrr' was built under R version 3.5.2
##
## Attaching package: 'purrr'
## The following object is masked from 'package:plyr':
##
## compact
## The following object is masked from 'package:gdata':
##
## keep
## The following object is masked from 'package:caret':
##
## lift
library(readr) # Read Rectangular Text Data
library(rpart)
## Warning: package 'rpart' was built under R version 3.5.2
library(rattle)
## Rattle: A free graphical interface for data science with R.
## Version 5.2.0 Copyright (c) 2006-2018 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
library(scales) # Scale Functions for Visualization
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
## The following object is masked from 'package:purrr':
##
## discard
library(shiny) # Web Application Framework for R
## Warning: package 'shiny' was built under R version 3.5.2
library(skimr) # Compact and Flexible Summaries of Data
## Warning: package 'skimr' was built under R version 3.5.2
##
## Attaching package: 'skimr'
## The following object is masked from 'package:knitr':
##
## kable
## The following object is masked from 'package:stats':
##
## filter
library(stringr) # Simple, Consistent Wrappers for Common String Operations
library(tibble) # Simple Data Frames
## Warning: package 'tibble' was built under R version 3.5.2
library(tidyr) # Easily Tidy Data with ‘spread()’ and ‘gather()’ Functions
## Warning: package 'tidyr' was built under R version 3.5.2
library(tidyverse) # Install and Load ‘Tidyverse’ Packages
library(usmap)
## Warning: package 'usmap' was built under R version 3.5.2
library(rworldmap)
## Loading required package: sp
## ### Welcome to rworldmap ###
## For a short introduction type : vignette('rworldmap')
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.5.2
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
## The following object is masked from 'package:plyr':
##
## ozone
library(mapdata)
library(cowplot)
## Warning: package 'cowplot' was built under R version 3.5.2
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggmap':
##
## theme_nothing
## The following object is masked from 'package:ggthemes':
##
## theme_map
library("googleway")
library("ggspatial")
library("rnaturalearth")
library("rnaturalearthdata")
theme_set(theme_bw())
library("sf")
## Warning: package 'sf' was built under R version 3.5.2
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
library(gridExtra)
world <- ne_countries(scale = "medium", returnclass = "sf") #for mapping
Load cleaned data
clean_data <- readRDS("../../data/processed_data/processeddata.rds")
skim(clean_data)
## Skim summary statistics
## n obs: 13165
## n variables: 23
##
## ── Variable type:factor ────────────────────────────────────────────────────────
## variable missing complete n n_unique
## bioregion 0 13165 13165 6
## georegion 0 13165 13165 9
## group_code 0 13165 13165 9
## group_code_UCSC_other 0 13165 13165 2
## island 0 13165 13165 6
## marine_season_code 0 13165 13165 57
## marine_site_name 0 13165 13165 77
## method_code 0 13165 13165 4
## method_code_IP_other 0 13165 13165 2
## mpa_region 0 13165 13165 6
## season_name 0 13165 13165 4
## site_code 0 13165 13165 77
## species_code 0 13165 13165 3
## state 0 13165 13165 4
## top_counts ordered
## San: 5608, Oly: 4138, Gov: 2366, Sal: 652 FALSE
## CA : 5382, CA : 2592, CA : 1660, OR: 1386 FALSE
## UCS: 9657, UCL: 2032, PBN: 400, SSS: 366 FALSE
## UCS: 9657, Oth: 3508, NA: 0 FALSE
## Mai: 12364, Bar: 279, Sad: 250, Hat: 150 FALSE
## SU1: 440, SU1: 403, SU1: 392, FA1: 376 FALSE
## Mil: 524, Sco: 513, Sti: 472, End: 468 FALSE
## IP: 12012, BT2: 782, GSE: 336, TS3: 35 FALSE
## IP: 12012, Oth: 1153, NA: 0 FALSE
## Cen: 5382, Sou: 2627, NUL: 1932, Nor: 1660 FALSE
## Sum: 4552, Fal: 4489, Spr: 4112, Win: 12 FALSE
## MCR: 524, SCT: 513, SWC: 472, END: 468 FALSE
## P.o: 11416, K FALSE
## CA: 10548, OR: 1386, WA: 865, AK: 366 FALSE
##
## ── Variable type:integer ───────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25
## marine_common_season 0 13165 13165 116.82 19.23 78 101
## marine_common_year 0 13165 13165 2009.7 4.81 2000 2006
## marine_sort_order 0 13165 13165 5566.24 1125.15 1720 5020
## season_sequence 0 13165 13165 2.03 0.81 1 1
## size_bin 0 13165 13165 82.44 42.2 5 50
## size_sort_order 0 13165 13165 9.57 4.15 1 6
## total 0 13165 13165 9.92 16.43 1 2
## p50 p75 p100 hist
## 118 131 152 ▅▅▆▇▇▇▇▅
## 2010 2013 2018 ▃▅▅▇▇▆▆▆
## 6090 6370 7650 ▁▁▁▂▃▃▇▁
## 2 3 4 ▇▁▇▁▁▇▁▁
## 80 110 320 ▅▇▇▃▁▁▁▁
## 10 12 33 ▅▇▇▃▁▁▁▁
## 4 11 360 ▇▁▁▁▁▁▁▁
##
## ── Variable type:numeric ───────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25 p50
## latitude 0 13165 13165 38.67 5.2 32.87 34.73 36.62
## longitude 0 13165 13165 -122.25 2.86 -135.38 -123.98 -121.94
## p75 p100 hist
## 41.65 57.05 ▇▆▃▂▁▁▁▁
## -120.62 -117.25 ▁▁▁▁▅▇▆▃
unique_sites <- subset(clean_data, !duplicated(clean_data$site_code))
#names(unique_sites)
sites_coordinates <- unique_sites[c(2,5:6,23)]
sites_coordinates_contig <- filter(sites_coordinates, sites_coordinates$state != "Alaska")
#sites_coordinates
CA_coordinates <- filter(sites_coordinates, sites_coordinates$state == "CA")
OR_coordinates <- filter(sites_coordinates, sites_coordinates$state == "OR")
WA_coordinates <- filter(sites_coordinates, sites_coordinates$state == "WA")
AK_coordinates <- filter(sites_coordinates, sites_coordinates$state == "AK")
CA_coordinates <- as.data.frame(CA_coordinates)
OR_coordinates <- as.data.frame(OR_coordinates)
WA_coordinates <- as.data.frame(WA_coordinates)
AK_coordinates <- as.data.frame(AK_coordinates)
After messing around with several different mapping tools in R, I decided that trying to display the whole range (CA to AK) on one map would be really hard, and the Alaska sites are really just at one specific location, so I’m going to split Alaska off from the three states that are part of the contiguous US
Alaska:
world <- ne_countries(scale = "medium", returnclass = "sf")
AK_map <- ggplot(data = world) +
geom_sf() +
geom_point(data= sites_coordinates,aes(x=sites_coordinates$longitude, y=sites_coordinates$latitude,), size = 4, alpha = 0.4, color ="orange") +
annotation_scale(location = "bl", width_hint = 0.3) +
coord_sf(xlim = c(-168, -131), ylim = c(51, 73), expand = FALSE) +
xlab("Longitude") +
ylab("Latitude") +
geom_text(aes(x = -151, y = 66, label = "ALASKA", size = 600)) +
geom_text(aes(x = -149.9, y = 61.4, label = "*", size = 600)) +
geom_text(aes(x = -154, y = 62.6, label = "Anchorage")) +
scale_x_continuous(breaks = c(-160, -150, -140)) +
theme(legend.position = "none")
AK_map
## Scale on map varies by more than 10%, scale bar may be inaccurate
ggsave(filename = "../../results/Exploratory_Analysis_results/AK_sites_map.png",plot = AK_map, width = 3.5, height = 4)
## Scale on map varies by more than 10%, scale bar may be inaccurate
State <- sites_coordinates_contig$state
CA_map <- ggplot(data = world) +
geom_sf() +
geom_jitter(data= sites_coordinates_contig,aes(x=sites_coordinates_contig$longitude, y=sites_coordinates_contig$latitude,col=State), size = 3, alpha = 0.6, width = 0.2) +
annotation_scale(location = "bl", width_hint = 0.3) +
coord_sf(xlim = c(-126, -116), ylim = c(32, 42.4), expand = FALSE) +
xlab("Longitude") +
ylab("Latitude")+
geom_text(aes(x = -122.7, y = 37.7, label = ">")) +
geom_text(aes(x = -123.5, y = 37.6, label = "San ")) +
geom_text(aes(x = -124.2, y = 37.1, label = "Francisco")) +
geom_text(aes(x = -120, y = 39, label = "CALIFORNIA")) +
geom_text(aes(x = -118.9, y = 33.7, label = ">")) +
geom_text(aes(x = -120.9, y = 33.6, label = "Los Angeles")) +
theme(legend.position = "none") +
scale_x_continuous(breaks = c(-124, -122, -120, -118))
CA_map
## Scale on map varies by more than 10%, scale bar may be inaccurate
ggsave(filename = "../../results/Exploratory_Analysis_results/CA_sites_map.png",plot = CA_map, width = 3.5, height = 4)
## Scale on map varies by more than 10%, scale bar may be inaccurate
OR_map <- ggplot(data = world) +
geom_sf() +
geom_jitter(data= sites_coordinates_contig,aes(x=sites_coordinates_contig$longitude, y=sites_coordinates_contig$latitude,col=State), size = 3, alpha = 0.6, width = 0.2) +
annotation_scale(location = "br", width_hint = 0.3) +
coord_sf(xlim = c(-127.5, -120.8), ylim = c(41.5, 47.5), expand = FALSE) +
xlab("Longitude") +
ylab("Latitude") +
geom_text(aes(x = -122.4, y = 44, label = "OREGON")) +
geom_text(aes(x = -122.8, y = 45.6, label = "*")) +
geom_text(aes(x = -122.8, y = 45.4, label = "Portland")) +
theme(legend.position = "none") +
scale_x_continuous(breaks = c(-126, -124, -122))
OR_map
## Scale on map varies by more than 10%, scale bar may be inaccurate
ggsave(filename = "../../results/Exploratory_Analysis_results/OR_sites_map.png",plot = OR_map, width = 3.5, height = 4)
## Scale on map varies by more than 10%, scale bar may be inaccurate
WA_map <- ggplot(data = world) +
geom_sf() +
geom_jitter(data= sites_coordinates_contig,aes(x=sites_coordinates_contig$longitude, y=sites_coordinates_contig$latitude,col=State), size = 3, alpha = 0.6, width = 0.2) +
annotation_scale(location = "br", width_hint = 0.3) +
coord_sf(xlim = c(-125.2, -121.3), ylim = c(45.8, 49.1), expand = FALSE) +
xlab("Longitude") +
ylab("Latitude") +
geom_text(aes(x = -122.5, y = 46.7, label = "WASHINGTON")) +
geom_text(aes(x = -123.7, y = 47.93, label = "Olympic")) +
geom_text(aes(x = -123.7, y = 47.75, label = "Peninsula")) +
geom_text(aes(x = -121.9, y = 47.75, label = "< Seattle")) +
theme(legend.position = "none") +
scale_x_continuous(breaks = c(-124, -123, -122))
WA_map
ggsave(filename = "../../results/Exploratory_Analysis_results/WA_sites_map.png",plot = WA_map, width = 3.5, height = 4)
clean_data %>% ggplot() +
geom_histogram(aes(total))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
CA <- clean_data %>% select(marine_site_name, marine_common_year, size_bin, total, state) %>%
filter(state == "CA")
summary(CA)
## marine_site_name marine_common_year size_bin
## Mill Creek : 524 Min. :2000 Min. : 5.00
## Scott Creek : 513 1st Qu.:2005 1st Qu.: 50.00
## Stillwater : 472 Median :2009 Median : 80.00
## Enderts : 468 Mean :2009 Mean : 81.92
## Terrace Point: 447 3rd Qu.:2013 3rd Qu.:110.00
## Andrew Molera: 443 Max. :2018 Max. :230.00
## (Other) :7681
## total state
## Min. : 1.000 AK: 0
## 1st Qu.: 2.000 CA:10548
## Median : 5.000 OR: 0
## Mean : 9.607 WA: 0
## 3rd Qu.: 11.000
## Max. :360.000
##
OR <- clean_data %>% select(marine_site_name, marine_common_year, size_bin, total, state) %>%
filter(state == "OR")
summary(OR)
## marine_site_name marine_common_year size_bin
## Fogarty Creek:351 Min. :2000 Min. : 5.00
## Bob Creek :334 1st Qu.:2005 1st Qu.: 50.00
## Burnt Hill :260 Median :2010 Median : 80.00
## Ecola :228 Mean :2010 Mean : 87.21
## Cape Arago :213 3rd Qu.:2014 3rd Qu.:120.00
## Alegria : 0 Max. :2018 Max. :230.00
## (Other) : 0
## total state
## Min. : 1.00 AK: 0
## 1st Qu.: 2.00 CA: 0
## Median : 7.00 OR:1386
## Mean : 15.28 WA: 0
## 3rd Qu.: 18.00
## Max. :212.00
##
WA <- clean_data %>% select(marine_site_name, marine_common_year, size_bin, total, state) %>%
filter(state == "WA")
summary(WA)
## marine_site_name marine_common_year size_bin
## Saddlebag North Cove:163 Min. :2008 Min. : 5.00
## Post Point :151 1st Qu.:2011 1st Qu.: 50.00
## Hat Island West :116 Median :2014 Median : 90.00
## Point Grenville :111 Mean :2014 Mean : 89.26
## Kydikabbit Point :102 3rd Qu.:2016 3rd Qu.:120.00
## Saddlebag South East: 87 Max. :2018 Max. :320.00
## (Other) :135
## total state
## Min. : 1.000 AK: 0
## 1st Qu.: 1.000 CA: 0
## Median : 2.000 OR: 0
## Mean : 6.776 WA:865
## 3rd Qu.: 5.000
## Max. :133.000
##
AK <- clean_data %>% select(marine_site_name, marine_common_year, size_bin, total, state) %>%
filter(state == "AK")
summary(AK)
## marine_site_name marine_common_year size_bin
## Sage Rock :148 Min. :2011 Min. : 5.00
## Pirates Cove :131 1st Qu.:2013 1st Qu.: 40.00
## Kayak Island : 87 Median :2015 Median : 60.00
## Alegria : 0 Mean :2015 Mean : 63.51
## Andrew Molera: 0 3rd Qu.:2017 3rd Qu.: 87.50
## Arroyo Hondo : 0 Max. :2018 Max. :190.00
## (Other) : 0
## total state
## Min. : 1.00 AK:366
## 1st Qu.: 1.00 CA: 0
## Median : 3.00 OR: 0
## Mean : 6.03 WA: 0
## 3rd Qu.: 7.00
## Max. :117.00
##
[INSERT TABLE SUMMARIZING THIS INFORMATION]
#clean_data %>% ggplot(aes(marine_common_year, total)) + geom_jitter(width = 0.15, alpha = 0.25)
clean_data %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year)) +
geom_boxplot(alpha = 0.4) +
xlab("Year") +
ylab("log(Abundance by Site)")
#clean_data %>% ggplot() + geom_density(aes(total, fill = marine_common_year))
clean_data %>% ggplot(aes(marine_common_year, total, col=state)) + geom_jitter(width = 0.15, alpha = 0.25)
Woah, California is dominating this visualization.
clean_data %>%
ggplot(aes(marine_common_year, total, col=state)) + geom_jitter(width = 0.15, alpha = 0.25) +
facet_wrap(~state)
Washington and Alaska clearly weren’t surveyed for all years during which California and Oregon were.
Abundance_State_Year_graphs <- clean_data %>%
ggplot(aes(marine_common_year, log(total), col=state)) + geom_jitter(width = 0.15, alpha = 0.25) +
facet_wrap(~state) +
xlab("Year") + ylab("log(Abundance)")
Abundance_State_Year_graphs
ggsave(filename = "../../results/Processing_results/Abundance_State_Year_graphs.png",plot = Abundance_State_Year_graphs, width = 5, height = 4)
```
clean_data %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year)) +
geom_boxplot(alpha = 0.4, fill = "grey67") +
xlab("Year") +
ylab("log(Abundance)")
P.ochra_only <- filter(clean_data, species_code == "P.ochraceus")
K.tuni_only <- filter(clean_data, species_code == "K.tunicata")
E.tros_only <- filter(clean_data, species_code == "E.troschelii")
#skim(P.ochra_only)
#skim(K.tuni_only)
#skim(E.tros_only)
P.ochra_only %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year)) +
geom_boxplot(fill = "orchid4", alpha = 0.4) +
xlab("Year") +
ylab("log(P.ochraceus Abundance)")
K.tuni_only %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year)) +
geom_boxplot(fill = "orange3", alpha = 0.4) +
xlab("Year") +
ylab("log(K.tunicata Abundance)")
E.tros_only %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year)) + geom_boxplot(fill = "skyblue3", alpha = 0.4) +
xlab("Year") +
ylab("log(E.troschelii Abundance)")
That last one looked like the log() function might not be applicable. Let’s see what it looks like without that:
E.tros_only %>% ggplot(aes(marine_common_year, total, group = marine_common_year)) + geom_boxplot(fill = "skyblue3", alpha = 0.4) +
xlab("Year") +
ylab("log(E.troschelii Abundance)")
The most recent and catastrophic sea star wasting disease outbreak hit the west coast of North America around 2013. How did sea star abundance look before the epidemic, how did it change during the epidemic, and have the populations rebounded?
last_decade <- filter(clean_data, marine_common_year >= "2010")
last_decade %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year)) +
geom_boxplot(alpha = 0.4, fill = "grey67") +
xlab("Year") +
ylab("log(Abundance)")
It looks like populations leveled back out at 2016 and remained relatively unchanged in 2017 and 2018.
It looks like theres a detectable decrease between 2013 and 2014, and that populations leveled back out at 2016 and remained relatively unchanged in 2017 and 2018. Lets narrow the time frame to only include sampling between 2013 and 2016.
last_decade$marine_common_year <- as.factor(last_decade$marine_common_year)
SSWD_timeframe <- filter(clean_data, marine_common_year >= "2013" & marine_common_year <= "2016")
SSWD_timeframe$marine_common_year <- as.factor(SSWD_timeframe$marine_common_year)
SSWD_timeframe %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year)) + geom_boxplot(alpha = 0.4, fill = "grey67") +
xlab("Year") +
ylab("log(Abundance)")
P.ochra_SSWD_tf <- filter(SSWD_timeframe, species_code == "P.ochraceus")
K.tuni_SSWD_tf <- filter(SSWD_timeframe, species_code == "K.tunicata")
E.tros_SSWD_tf <- filter(SSWD_timeframe, species_code == "E.troschelii")
#skim(P.ochra_SSWD_tf)
#skim(K.tuni_SSWD_tf)
#skim(E.tros_SSWD_tf)
P.ochra_SSWD_tf %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year)) +
geom_boxplot(fill = "orchid4", alpha = 0.4) +
xlab("Year") +
ylab("log(P.ochraceus Abundance)")
K.tuni_SSWD_tf %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year)) +
geom_boxplot(fill = "orange3", alpha = 0.4) +
xlab("Year") +
ylab("log(K.tunicata Abundance)")
E.tros_SSWD_tf %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year)) + geom_boxplot(fill = "skyblue3", alpha = 0.4) +
xlab("Year") +
ylab("log(E.troschelii Abundance)")
P.ochra_SSWD_tf_fig <- P.ochra_SSWD_tf %>% ggplot(aes(marine_common_season, log(total), group = marine_common_season)) +
geom_boxplot(fill = "orchid4", alpha = 0.4) +
xlab("SeasonYear") +
ylab("log(P.ochra Abundance)") +
scale_x_continuous(breaks = c("Sp13" = 129, "Sm13" = 130, "Fl13" = 131, "Sp14" = 133, "Sm14" = 134, "Fl14" = 135, "Sp15" = 137, "Sm15" = 138, "Fl15" = 139, "Sp16" = 141, "Sm16" = 142, "Fl16" = 143))
P.ochra_SSWD_tf_fig
# Used to code above x-axis
#numeric_seasons_code_key <- readRDS("../../data/processed_data/numeric_seasons_code_key.rds")
#numeric_seasons_code_key
K.tuni_SSWD_tf_fig <- K.tuni_SSWD_tf %>% ggplot(aes(marine_common_season, log(total), group = marine_common_season)) +
geom_boxplot(fill = "orange3", alpha = 0.4) +
xlab("SeasonYear") +
ylab("log(K.tuni Abundance)") +
scale_x_continuous(breaks = c("Sp13" = 129, "Sm13" = 130, "Fl13" = 131, "Sp14" = 133, "Sm14" = 134, "Fl14" = 135, "Sp15" = 137, "Sm15" = 138, "Fl15" = 139, "Sp16" = 141, "Sm16" = 142, "Fl16" = 143))
K.tuni_SSWD_tf_fig
E.tros_SSWD_tf_fig <- E.tros_SSWD_tf %>% ggplot(aes(marine_common_season, log(total), group = marine_common_season)) +
geom_boxplot(fill = "skyblue3", alpha = 0.4) +
xlab("SeasonYear") +
ylab("log(E.tros Abundance)") +
scale_x_continuous(breaks = c("Sp13" = 129, "Sm13" = 130, "Fl13" = 131, "Sp14" = 133, "Sm14" = 134, "Fl14" = 135, "Sp15" = 137, "Sm15" = 138, "Fl15" = 139, "Sp16" = 141, "Sm16" = 142, "Fl16" = 143))
E.tros_SSWD_tf_fig
grid.arrange(P.ochra_SSWD_tf_fig, K.tuni_SSWD_tf_fig, nrow=2)
P.ochra_SSWD_tf_fig + facet_wrap(~ state)
P.ochra_SSWDtf_CA <- filter(P.ochra_SSWD_tf, state == "CA")
P.ochra_SSWDtf_CA_fig <- P.ochra_SSWDtf_CA %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year)) +
geom_boxplot(fill = "orchid4", alpha = 0.4) +
xlab("Year") +
ylab("log(P.ochra Abundance)")
P.ochra_SSWDtf_CA_fig <- P.ochra_SSWDtf_CA_fig + facet_wrap(~ marine_sort_order)
ggsave(filename = "../../results/Exploratory_Analysis_results/P.ochra_SSWDtf_CA_fig.png",plot = P.ochra_SSWDtf_CA_fig, width = 5, height = 4)
P.ochra_SSWDtf_CA %>% ggplot2::ggplot() + geom_histogram(aes(P.ochra_SSWDtf_CA$total)) + facet_wrap(~ marine_common_year, 1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
P.ochra_SSWDtf_highcount_temp <- P.ochra_SSWD_tf %>% filter(marine_common_year == "2013" & total >= 10)
unique(P.ochra_SSWDtf_highcount_temp$site_code)
## [1] BML BOA BOB BRN CARP CRCO DAPT DMN END ESP FKC FOG GREN HOP
## [15] KIB MCR MEN MOL MUSH OCC OLDS PIRA POP SAGE SBSE SCT SEA SHCO
## [29] SWC TPT TRIS WHPT
## 77 Levels: ALEG ARG ARHO ARN BML BOA BOB BOD BRN CARP CAY CHM ... WIN
P.ochra_SSWDtf_highcount <- P.ochra_SSWD_tf %>% filter(site_code == "BML" | site_code == "BOA" | site_code == "BOB" | site_code == "BRN" | site_code == "CARP" | site_code == "CRCO" | site_code == "DAPT" | site_code == "DMN" | site_code == "END" | site_code == "ESP" | site_code == "FKC" | site_code == "FOG" | site_code == "GREN" | site_code == "HOP" | site_code == "KIB" | site_code == "MCR" | site_code == "MEN" | site_code == "MOL" | site_code == "MUSH" | site_code == "OCC" | site_code == "OLDS" | site_code == "PIRA" | site_code == "POP" | site_code == "SAGE" | site_code == "SBSE" | site_code == "SCT" | site_code == "SEA" | site_code == "SHCO" | site_code == "SWC" | site_code == "TPT" | site_code == "TRIS" | site_code == "WHPT")
head(P.ochra_SSWDtf_highcount)
## group_code site_code marine_site_name marine_sort_order latitude
## 1 UCSC BML Bodega 5200 38.3182
## 2 UCSC BML Bodega 5200 38.3182
## 3 UCSC BML Bodega 5200 38.3182
## 4 UCSC BML Bodega 5200 38.3182
## 5 UCSC BML Bodega 5200 38.3182
## 6 UCSC BML Bodega 5200 38.3182
## longitude marine_common_season marine_season_code marine_common_year
## 1 -123.0737 130 SU13 2013
## 2 -123.0737 130 SU13 2013
## 3 -123.0737 130 SU13 2013
## 4 -123.0737 130 SU13 2013
## 5 -123.0737 130 SU13 2013
## 6 -123.0737 130 SU13 2013
## season_sequence season_name method_code species_code size_sort_order
## 1 2 Summer IP P.ochraceus 3
## 2 2 Summer IP P.ochraceus 4
## 3 2 Summer IP P.ochraceus 5
## 4 2 Summer IP P.ochraceus 6
## 5 2 Summer IP P.ochraceus 7
## 6 2 Summer IP P.ochraceus 8
## size_bin total mpa_region georegion bioregion
## 1 20 1 North Central Coast CA North Central OlyCstWA_2_SanFran
## 2 30 2 North Central Coast CA North Central OlyCstWA_2_SanFran
## 3 40 1 North Central Coast CA North Central OlyCstWA_2_SanFran
## 4 50 1 North Central Coast CA North Central OlyCstWA_2_SanFran
## 5 60 5 North Central Coast CA North Central OlyCstWA_2_SanFran
## 6 70 15 North Central Coast CA North Central OlyCstWA_2_SanFran
## island group_code_UCSC_other method_code_IP_other state
## 1 Mainland UCSC IP CA
## 2 Mainland UCSC IP CA
## 3 Mainland UCSC IP CA
## 4 Mainland UCSC IP CA
## 5 Mainland UCSC IP CA
## 6 Mainland UCSC IP CA
P.ochra_SSWDtf_highcount %>% ggplot2::ggplot() + geom_histogram(aes(P.ochra_SSWDtf_highcount$total)) + facet_wrap(~ marine_common_year, 1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
P.ochra_SSWDtf_highcount_fig <- P.ochra_SSWDtf_highcount %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year)) +
geom_boxplot(fill = "orchid4", alpha = 0.4) +
xlab("Year") +
ylab("log(P.ochra Abundance)")
P.ochra_SSWDtf_highcount_fig <- P.ochra_SSWDtf_highcount_fig + facet_wrap(~ marine_sort_order)
P.ochra_SSWDtf_highcount_fig
Over 25
P.ochra_SSWDtf_highercount_temp <- P.ochra_SSWD_tf %>% filter(marine_common_year == "2013" & total >= 25)
unique(P.ochra_SSWDtf_highercount_temp$site_code)
## [1] BOB BRN CRCO DMN END FKC FOG GREN MCR MOL MUSH OCC PIRA SCT
## [15] SEA SHCO SWC TRIS
## 77 Levels: ALEG ARG ARHO ARN BML BOA BOB BOD BRN CARP CAY CHM ... WIN
P.ochra_SSWDtf_over25 <- P.ochra_SSWD_tf %>% filter(site_code == "BOB" | site_code == "BRN" | site_code == "CRCO" | site_code == "DMN" | site_code == "END" | site_code == "FKC" | site_code == "FOG" | site_code == "GREN" | site_code == "MCR" | site_code == "MOL" | site_code == "MUSH" | site_code == "OCC" | site_code == "PIRA" | site_code == "SCT" | site_code == "SEA" | site_code == "SHCO" | site_code == "SWC" | site_code == "TRIS")
P.ochra_SSWDtf_over25_fig <- P.ochra_SSWDtf_over25 %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year)) +
geom_boxplot(fill = "orchid4", alpha = 0.4) +
xlab("Year") +
ylab("log(P.ochra Abundance)")
P.ochra_SSWDtf_over25_fig <- P.ochra_SSWDtf_over25_fig + facet_wrap(~ marine_sort_order)
P.ochra_SSWDtf_over25_fig
ggsave(filename = "../../results/Exploratory_Analysis_results/P.ochra_SSWDtf_over25_fig.png",plot = P.ochra_SSWDtf_over25_fig, width = 5, height = 4)
P.ochra_SSWDtf_highestcount_temp <- P.ochra_SSWD_tf %>% filter(marine_common_year == "2013" & total >= 40)
unique(P.ochra_SSWDtf_highestcount_temp$site_code)
## [1] DMN END FKC FOG GREN MCR OCC SHCO
## 77 Levels: ALEG ARG ARHO ARN BML BOA BOB BOD BRN CARP CAY CHM ... WIN
P.ochra_SSWDtf_over40 <- P.ochra_SSWD_tf %>% filter(site_code == "DMN" | site_code == "END" | site_code == "FKC" | site_code == "FOG" | site_code == "GREN" | site_code == "MCR" | site_code == "OCC" | site_code == "SHCO")
P.ochra_SSWDtf_over40_fig <- P.ochra_SSWDtf_over40 %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year)) +
geom_boxplot(fill = "orchid4", alpha = 0.4) +
xlab("Year") +
ylab("log(P.ochra Abundance)")
P.ochra_SSWDtf_over40_fig <- P.ochra_SSWDtf_over40_fig + facet_wrap(~ marine_sort_order, nrow =2)
P.ochra_SSWDtf_over40_fig
ggsave(filename = "../../results/Exploratory_Analysis_results/P.ochra_SSWDtf_over40_fig.png",plot = P.ochra_SSWDtf_over40_fig, width = 5, height = 4)
SSWD_timeframe %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year, col=marine_common_year)) + geom_boxplot() + facet_wrap(~state, 1) + theme(legend.position = "none")
SSWD_timeframe %>% ggplot(aes(marine_common_year, log(total), col=state)) + geom_jitter(width = 0.45, alpha = 0.25) +
facet_wrap(~state, 1) + geom_smooth(method = "lm", col="black") + theme(legend.position = "none")
SSWD_timeframe_CA <- filter(SSWD_timeframe, state == "CA")
SSWD_timeframe_CA$marine_common_year <- as.factor(SSWD_timeframe_CA$marine_common_year)
SSWD_timeframe_CA %>% ggplot(aes(marine_common_year, log(total), col=marine_common_year)) + geom_jitter(width = 0.5, alpha = 0.25) + geom_smooth(method = "lm", col="red") + theme(legend.position = "none")
SSWD_timeframe_P.ochra <- filter(P.ochra_only, marine_common_year >= "2013" & marine_common_year <= "2016")
SSWD_timeframe_P.ochra$marine_common_year <- as.factor(SSWD_timeframe_P.ochra$marine_common_year)
SSWD_timeframe_P.ochra %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year, col=marine_common_year)) + geom_boxplot() + facet_wrap(~state, 1) + theme(legend.position = "none")
SSWD_timeframe_K.tuni <- filter(K.tuni_only, marine_common_year >= "2013" & marine_common_year <= "2016")
SSWD_timeframe_K.tuni$marine_common_year <- as.factor(SSWD_timeframe_K.tuni$marine_common_year)
SSWD_timeframe_K.tuni %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year, col=marine_common_year)) + geom_boxplot() + facet_wrap(~state, 1) + theme(legend.position = "none")
SSWD_timeframe_E.tros <- filter(E.tros_only, marine_common_year >= "2013" & marine_common_year <= "2016")
SSWD_timeframe_E.tros$marine_common_year <- as.factor(SSWD_timeframe_E.tros$marine_common_year)
SSWD_timeframe_E.tros %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year, col=marine_common_year)) + geom_boxplot() + facet_wrap(~state, 1) + theme(legend.position = "none")
OH! I didn’t catch that the reason why E.tros had such few survey sightings was because its range appears to only be in Alaska. Speaking of which….
Species <- SSWD_timeframe$species_code
broad_jitter <- ggplot(data = world) +
geom_sf() +
geom_jitter(data= SSWD_timeframe,aes(x=SSWD_timeframe$longitude, y=SSWD_timeframe$latitude,col=Species), size = 2, alpha = 0.3, width = 3) +
annotation_scale(location = "bl", width_hint = 0.3) +
coord_sf(xlim = c(-128, -115), ylim = c(32, 49.5), expand = FALSE) +
xlab("Longitude") +
ylab("Latitude")
narrow_jitter <- ggplot(data = world) +
geom_sf() +
geom_jitter(data= SSWD_timeframe,aes(x=SSWD_timeframe$longitude, y=SSWD_timeframe$latitude,col=Species), size = 2, alpha = 0.3, width = 0.3) +
annotation_scale(location = "bl", width_hint = 0.3) +
coord_sf(xlim = c(-128, -115), ylim = c(32, 49.5), expand = FALSE) +
xlab("Longitude") +
ylab("Latitude")
gridExtra::grid.arrange(broad_jitter,narrow_jitter,nrow=2)
## Scale on map varies by more than 10%, scale bar may be inaccurate
## Scale on map varies by more than 10%, scale bar may be inaccurate
AK_zoom1_SSWD <- ggplot(data = world) +
geom_sf() +
geom_point(data= SSWD_timeframe,aes(x=SSWD_timeframe$longitude, y=SSWD_timeframe$latitude,col=Species,), size = 2, alpha = 0.4) +
annotation_scale(location = "bl", width_hint = 0.3) +
coord_sf(xlim = c(-140, -130), ylim = c(55, 62), expand = FALSE) +
xlab("Longitude") +
ylab("Latitude")
AK_zoom2_SSWD <- ggplot(data = world) +
geom_sf() +
geom_point(data= SSWD_timeframe,aes(x=SSWD_timeframe$longitude, y=SSWD_timeframe$latitude,col=Species,), size = 2, alpha = 0.4) +
annotation_scale(location = "bl", width_hint = 0.3) +
coord_sf(xlim = c(-136, -135), ylim = c(56.6, 57.3), expand = FALSE) +
xlab("Longitude") +
ylab("Latitude")
grid.arrange(AK_zoom1_SSWD, AK_zoom2_SSWD, nrow = 2)
## Scale on map varies by more than 10%, scale bar may be inaccurate